rm(list=ls())
setwd("E:/data/cooperation/democracy/data") #set the working file
#load the packages
library(foreign)
library(xlsxjars)
library(xlsx)
library(ggplot2)
library(gridExtra)
library(readstata13)
#load the original data
infdata=read.csv("original.csv")
infdata2 <- na.omit(infdata)
infdata2<-infdata2[,-c(1,2)]
library(randomForest)  
system.time(Randommodel <- randomForest(fh_ipolity2~ ., data=infdata2,importance = TRUE, proximity = FALSE, ntree = 100))  
print(Randommodel)
imp<-importance(Randommodel,type=2)  #Gini index

imp<-imp[order(imp,decreasing=T),]
write.csv(imp,file="importance.csv")
varImpPlot(Randommodel)         #visualization
country<-names(table(infdata$ccodealp))
write.csv(country,file="country.csv")
#load the data
da<-read.dta("data.dta")

##statistical analysis----
summary(da)
nam<-names(da)
op<-par(mfrow = c(3, 3),mai=c(0.3,0.3,0.3,0.3))
l=c(3:9,10:11)
## histogram of the variables
for (i in 1:9)
{
  hist(da[,l[i]],freq = F,main=nam[l[i]],xlab=NA,breaks = 30)
  curve(dnorm(x,mean(da[,l[i]],na.rm = T),sd(da[,l[i]],na.rm = T)),add=T,col="red")
}

ggplot(da,aes(x=internetuse))+
  geom_point(aes(y=polity))+
  geom_smooth(aes(y=polity),method="loess",color=alpha("red",0.5),size=1)+
  facet_wrap(~year)+
  theme_bw()+
  theme(strip.background=element_rect(fill="gray92"))+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5),axis.line = element_line())+
  labs(title="Fig S1: The Relationship of polity and internetuser in Years")

ggplot(da,aes(x=internetuse))+
  geom_point(aes(y=polity,color=year))+
  geom_smooth(aes(y=polity),method="loess",color=alpha("red",0.5),size=1)+
  scale_colour_gradient(low = "grey",high = "black")+
  theme_bw()+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5),axis.line = element_line())+
  labs(title="Fig1: The Relationship of polity and internetuser")

##slip the data into two groups of high and low democracy
country<-names(table(da$ccodealp))
FirstQu_polity<-vector(length=length(country))
Median_polity<-vector(length=length(country))
Mean_polity<-vector(length=length(country))
ThirdQu_polity<-vector(length=length(country))
group<-vector(length=length(country))
da_polity<-data.frame(country,FirstQu_polity,Median_polity,Mean_polity,ThirdQu_polity)
for (i in 1:length(country))
{
  d<-summary(da[which(da$ccodealp==da_polity[i,1]),3])
  da_polity[i,2]<-d[[2]]
  da_polity[i,3]<-d[[3]]
  da_polity[i,4]<-d[[4]]
  da_polity[i,5]<-d[[5]]
}
me<-mean(da_polity$Median_polity)
da_polity<-data.frame(da_polity,group=group)
for (i in 1:length(country))
{
  da_polity$group<-as.numeric(da_polity$Median_polity>me)
}
highcountry<-da_polity[da_polity$group==1,1]
lowcountry<-da_polity[da_polity$group==0,1]
#draw the figure 3
ggplot(da_polity,aes(Median_polity))+
  geom_histogram(aes(y=..density..),bins = 20,fill="white",color="black")+
  stat_density(geom='line',position='identity',size=1,color="red")+
  theme_bw()+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5,size=9),axis.line = element_line())+
  labs(title="Fig 3: The Histogram of Medium_polity in different countries")
group<-vector(length=dim(da)[1])
da<-data.frame(da,group)
ma<-match(highcountry,da$ccodealp)
for (i in 1:length(ma))
{
  da$group[ma[i]:(ma[i]+21)]<-1
}
ma<-match(lowcountry,da$ccodealp)
for (i in 1:length(ma))
{
  da$group[ma[i]:(ma[i]+21)]<-0
}
names(da)[2]<-"countrycode"
write.dta(da,file="da.dta")

##trend---
x<-seq(0,10,0.01)
y=0.0665*x-0.0078*x^2
a1<-c(0,0,10)
a2<-c(0,0,0)
a1[2]<-0.0665/0.0078/2
a2[3]<-0.0665*10-0.0078*10^2
a2[2]<-0.0665*a1[2]-0.0078*a1[2]^2
da1<-data.frame(x=a1,y=a2)
da<-data.frame(internetuses=x,polity=y)
#draw the figure 2
ggplot(da,aes(x=internetuses,y=polity))+
  geom_line(color="cyan4",size=1)+
  theme_bw()+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5),axis.line = element_line())+
  geom_hline(yintercept=a2[2],xlimits=c(0,3))+
  scale_size(range = c(0,a1[2]))+
  geom_vline(xintercept=a1[1],linetype="dotted")+
  geom_vline(xintercept=a1[2],linetype="dotted")+
  geom_vline(xintercept=10,linetype="dotted")+
  geom_hline(yintercept=0)

##add the political system and press data-----
da<-read.dta("da.dta")
da1<-read.dta("press.dta")
da2<-read.dta("polit_sys.dta")
da<-merge(da, da1, by = c("year", "countrycode"),all.x = T)
da<-merge(da, da2, by = c("year", "countrycode"),all.x = T)
###change the political system
da$polit_sys[which(da$polit_sys==1)]<-0
da$polit_sys[which(da$polit_sys==2)]<-1
da$polit_sys[which(da$countrycode=="TGO")]<-0
da$polit_sys[which(da$countrycode=="PAK")]<-0
da$polit_sys[which(da$countrycode=="NPL")]<-1
da$polit_sys[which(da$countrycode=="HRV")]<-1
da$polit_sys[which(da$countrycode=="ISR")]<-1
da$polit_sys[which(da$countrycode=="KHM")]<-1
da$polit_sys[which(da$countrycode=="LVA")]<-1
write.dta(da,file="fdata.dta")



###linear quantile mixed model
library(reshape2)
da1<-read.dta("da.dta")
da<-read.csv("result.csv",header = F) #regression resutl from stata
colnames(da)<-seq(0.01,0.99,0.01)
da<-da[seq(1,18,2),]
rownames(da)<-c("internetuse","sqrinter","gdp","wdi_pop65","wbgi_cce","wbgi_vae","wbgi_pse","wdi_expmilgdp","constant")
quan<-seq(0.01,0.99,0.01)
da<-t(da)
da<-data.frame(democracy=quantile(da1$polity,quan),da)
write.dta(da,file="result.dta")
da<-melt(da,id="democracy")

da1<-da[which(da$variable=="internetuse"),]
#draw the figure 4
ggplot(da1,aes(x=democracy,y=value))+
  geom_point(size=1)+
  geom_smooth(method = "loess")+
  geom_hline(yintercept=0.1207,color="red")+
  theme_bw()+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5,size=13),axis.line = element_line())+
  labs(title="Internet Penetration",y="coefficient")+
  scale_colour_gradient(low = "green", high = "blue")+
  scale_fill_discrete(labels=c("Control", "Type 1\ntreatment",
                               "Type 2\ntreatment"))



da2<-da[which(da$variable=="wbgi_cce"),]
q2<-ggplot(da2,aes(x=democracy,y=value))+
  geom_point(size=1)+
  geom_smooth(method = "loess")+
  theme_bw()+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5,size=10),axis.line = element_line())+
  labs(title="Control of Corruption",y="coefficient")+
  scale_colour_gradient(low = "green", high = "blue")

da3<-da[which(da$variable=="wbgi_vae"),]
q3<-ggplot(da3,aes(x=democracy,y=value))+
  geom_point(size=1)+
  geom_smooth(method = "loess")+
  theme_bw()+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5,size=10),axis.line = element_line())+
  labs(title="Voice and Accountability",y="coefficient")+
  scale_colour_gradient(low = "green", high = "blue")

da4<-da[which(da$variable=="wbgi_pse"),]
q4<-ggplot(da4,aes(x=democracy,y=value))+
  geom_point(size=1)+
  geom_smooth(method = "loess")+
  theme_bw()+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5,size=10),axis.line = element_line())+
  labs(title="Political Stability",y="coefficient")+
  scale_colour_gradient(low = "green", high = "blue")

da5<-da[which(da$variable=="gdp"),]
q5<-ggplot(da5,aes(x=democracy,y=value))+
  geom_point(size=1)+
  geom_smooth(method = "loess")+
  theme_bw()+
  theme(panel.grid=element_blank(),panel.border=element_blank(),plot.title = element_text(hjust = 0.5,size=10),axis.line = element_line())+
  labs(title="Ln GDP per Capita",y="coefficient")+
  scale_colour_gradient(low = "green", high = "blue")
grid.arrange(q2,q3,q4,q5) ##draw the figure 5









